Scoring signaling pathway activity

loading packages and data

In this tutorial, we utilize the GSE154109 dataset, which contains single-cell RNA sequencing (scRNA-seq) data from acute myeloid leukemia (AML) patients, to demonstrate how dimension reduction enhances cytokine activity inference. Cell type annotations were obtained from the TISCH2 database.

object <- readRDS('data/AML_GSE154109.RDS')
ann <- read.table('data/AML_GSE154109_CellMetainfo_table.tsv', row.names = 1, header = TRUE, sep = '\t')
object$Sample <- ann[Cells(object), 'Sample']
object$celltype <- ann[Cells(object), 'Celltype..major.lineage.']
head(object[[]])
                                    orig.ident nCount_RNA
P1@AAATGCCAGACTAGAT-1 AML_GSE154109_expression       5715
P1@AAGGTTCTCAACGGGA-1 AML_GSE154109_expression       2676
P1@ACACCAAGTACCGGCT-1 AML_GSE154109_expression       7047
P1@ACATCAGGTTTAAGCC-1 AML_GSE154109_expression       4330
P1@ACTTGTTGTGGCGAAT-1 AML_GSE154109_expression      12116
P1@AGCGTATAGAGTAATC-1 AML_GSE154109_expression       2879
                      nFeature_RNA Sample celltype
P1@AAATGCCAGACTAGAT-1         1399   AML1        B
P1@AAGGTTCTCAACGGGA-1          932   AML1        B
P1@ACACCAAGTACCGGCT-1         1931   AML1        B
P1@ACATCAGGTTTAAGCC-1         1356   AML1        B
P1@ACTTGTTGTGGCGAAT-1         2049   AML1        B
P1@AGCGTATAGAGTAATC-1         1037   AML1        B

Data pre-processing

We aimed to demonstrate that dimension reduction techniques could improve the inference of cytokine activity. In this study, we implemented three distinct dimension reduction methods: Principal Component Analysis (PCA), Non-negative Matrix Factorization (NMF), and Multiple Correspondence Analysis (MCA).

gs <- readGMT('data/CD8_Tcells_activation.gmt')
grate <- getGeneRate(background.geneset = 'data/Tres.kegg', signature.geneset = gs, mode = "multiple")
head(grate)
       background signature
ACSS2 0.016129032         0
GCK   0.037634409         0
PGK2  0.005376344         0
PGK1  0.005376344         0
PDHB  0.026881720         0
PDHA1 0.026881720         0
object <- rebuildMatrix(object, features = NULL, assay = "RNA", slot = "data", method = 'pca', dim = 20, avc = NULL)
object <- rebuildMatrix(object, features = NULL, assay = "RNA", slot = "data", method = 'nmf', dim = 20, avc = NULL)

iter |      tol 
---------------
   1 | 8.97e-01
   2 | 9.01e-02
   3 | 2.32e-02
   4 | 1.01e-02
   5 | 5.77e-03
   6 | 3.81e-03
   7 | 2.77e-03
   8 | 2.20e-03
   9 | 1.87e-03
  10 | 1.67e-03
  11 | 1.53e-03
  12 | 1.39e-03
  13 | 1.21e-03
  14 | 1.01e-03
  15 | 8.41e-04
  16 | 7.15e-04
  17 | 6.37e-04
  18 | 6.07e-04
  19 | 6.11e-04
  20 | 6.08e-04
  21 | 5.69e-04
  22 | 5.00e-04
  23 | 4.33e-04
  24 | 3.73e-04
  25 | 3.27e-04
  26 | 2.91e-04
  27 | 2.59e-04
  28 | 2.30e-04
  29 | 2.03e-04
  30 | 1.81e-04
  31 | 1.63e-04
  32 | 1.48e-04
  33 | 1.32e-04
  34 | 1.18e-04
  35 | 1.08e-04
  36 | 9.96e-05
  37 | 9.23e-05
  38 | 8.54e-05
  39 | 7.88e-05
  40 | 7.36e-05
  41 | 6.92e-05
  42 | 6.55e-05
  43 | 6.18e-05
  44 | 5.89e-05
  45 | 5.60e-05
  46 | 5.36e-05
  47 | 5.15e-05
  48 | 4.96e-05
  49 | 4.82e-05
  50 | 4.70e-05
  51 | 4.59e-05
  52 | 4.48e-05
  53 | 4.35e-05
  54 | 4.18e-05
  55 | 4.03e-05
  56 | 3.88e-05
  57 | 3.72e-05
  58 | 3.56e-05
  59 | 3.38e-05
  60 | 3.19e-05
  61 | 3.02e-05
  62 | 2.85e-05
  63 | 2.67e-05
  64 | 2.50e-05
  65 | 2.34e-05
  66 | 2.19e-05
  67 | 2.06e-05
  68 | 1.93e-05
  69 | 1.80e-05
  70 | 1.68e-05
  71 | 1.57e-05
  72 | 1.47e-05
  73 | 1.38e-05
  74 | 1.29e-05
  75 | 1.21e-05
  76 | 1.13e-05
  77 | 1.06e-05
  78 | 9.96e-06
object <- rebuildMatrix(object, features = NULL, assay = "RNA", slot = "data", method = 'mca', dim = 20, avc = NULL)
10.777 sec elapsed
234.672 sec elapsed
19.357 sec elapsed

Cytokine signaling activity

To infer Cytokine signaling activity, a model matrix file that quantifies the relative changes in gene expression is required. Ridge regression is then applied, using gene expression as the dependent variable (Y) and the model matrix as the independent variable (X). The activity of each cytokine is quantified as the ratio of the regression coefficient to its standard error. This methodological approach was first introduced in CytoSig. SaaSc is capable of utilizing the model matrix file from CytoSig as well as those from alternative sources.

Scoring

res <- list()
for(method in list(NULL, 'nmf', 'pca', 'mca')){
  object@reductions[['active.method']] <- method
  Tscore <- computeResponse(object, gene.rate = grate, celltype = 'CD8T', signature = 'Tcell_activation')
  Signaling <- computeSignaling(object, model.file = 'data/signature.centroid.expand', celltype = 'CD8T', cytokine = NULL,
                              lambda = 10000, num.permutations = 0, test.method = "two-sided")[,'TGFB1', drop = FALSE]
  res <- append(res, list(merge(Tscore, Signaling, by = "row.names")))
}
names(res) <- c('none', 'nmf', 'pca', 'mca')
head(res[['mca']])
              Row.names Tcell_activation        TGFB1
1 P1@AACCGCGGTGATGTGG-1       -0.4996827  0.605949398
2 P1@AACGTTGGTGACTACT-1       -2.5030458  1.094231813
3 P1@AAGACCTGTCTTTCAT-1       -0.5182805  0.004603452
4 P1@ACTGTCCCACACGCTG-1        0.4594864  0.665623230
5 P1@AGAGTGGCAAGCGTAG-1       -1.9412061  1.417185743
6 P1@AGAGTGGTCCAGGGCT-1        0.6585127 -0.008667152

Performance comparison

Benchmarking the performance of SaaSc for cytokine activity inference presents significant challenges. It is widely accepted in the scientific community that CD8 T cell activation negatively correlates with TGFβ1 cytokine activity. This relationship has been previously utilized in the Tres method to identify immune-response related genes in T cells. Therefore, a stronger negative correlation between inferred TGFβ1 activity and CD8 T cell activation scores serves as an indicator of improved performance.

title <- paste0(names(res), ' (cor: ', signif(sapply(res, function(x) cor(x[,2], x[,3])),2), ')')
plot_list <- list()

for(i in 1:length(res)) {
  plot_list[[i]] <- ggplot(res[[i]], aes(y = Tcell_activation, x = TGFB1)) +
    geom_point(alpha = 0.7, color = "blue") + 
    geom_smooth(method = "lm", color = "red", se = TRUE) + 
    ggtitle(title[i]) + theme_bw() + 
    theme(
      plot.title = element_text(hjust = 0.5, size = 12),
      axis.title = element_text(size = 10),
      axis.text = element_text(size = 8),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank()
    )
}
combined_plot <- plot_list[[1]] + plot_list[[2]] + plot_list[[3]] + plot_list[[4]] +
  plot_layout(nrow = 2, ncol = 2)

print(combined_plot)

As demonstrated above, dimension reduction using MCA significantly enhanced the performance of cytokine activity inference, yielding a Pearson correlation coefficient (PCC) of -0.44, compared to 0.073 when no dimension reduction was applied.

Signalling pathway activities

A conventional approach for pathway activity inference relies on gene set scoring. However, this method has a significant limitation: most gene sets only include pathway member genes while excluding downstream target genes, making them inadequate for accurately assessing signaling pathway activity. Recognizing this gap, the SEED2 database has systematically compiled molecular signatures across 16 distinct signaling pathways. These signatures capture the specific gene expression alterations that occur when the pathway of interest is perturbed, thereby providing a more comprehensive representation of pathway dynamics. As a result, these curated signatures can be directly implemented in SaaSc to generate robust and reliable inferences of signaling pathway activities, offering researchers a more accurate tool for pathway functional assessment in single-cell.

Scoring

# code here
strsplit(readLines('data/SPEED2_signaling.tsv', n = 1), '\t')[[1]]
 [1] "Estrogen" "H2O2"     "Hippo"    "Hypoxia"  "IL-1"     "Insulin" 
 [7] "JAK-STAT" "MAPK"     "TLR"      "Notch"    "p53"      "PI3K"    
[13] "PPAR"     "RAR"      "TGFb"     "TNFa"     "Trail"    "VEGF"    
[19] "Wnt"     
object@reductions[['active.method']] <- 'mca'
Signaling <- computeSignaling(object, model.file = 'data/SPEED2_signaling.tsv', celltype = NULL, cytokine = NULL,
                            lambda = 10000, num.permutations = 0, test.method = "two-sided")

As an example, we show the distribution of Wnt pathway activity in each cell types:

Wnt <- aggregate(Signaling[,'Wnt'], by = list(object$celltype), FUN='median') %>% 
  setNames(c('cell', 'median')) %>% arrange(desc(median))
Wnt$color <- ifelse(Wnt$median > 0, "Positive", "Negative")
Wnt$cell <- factor(Wnt$cell, levels = Wnt$cell)

ggplot(Wnt, aes(x = cell, y = median, fill = color)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("Positive" = "red", "Negative" = "blue")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = 'top',
    legend.title = element_blank()
  ) +
  labs(
    title = "",
    x = "Cell Type",
    y = "Wnt pathway activity"
  ) 

As shown above, ‘Wnt pathway’ is activated in ‘Malignant’ cells with the highest activity.

sessionInfo

R version 4.4.3 (2025-02-28)
Platform: x86_64-apple-darwin20
Running under: macOS Ventura 13.7.4

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Asia/Shanghai
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods  
[7] base     

other attached packages:
 [1] dplyr_1.1.4        patchwork_1.3.0    ggplot2_3.5.1     
 [4] Matrix_1.7-3       ROCit_2.1.2        Seurat_5.2.1      
 [7] SeuratObject_5.0.2 sp_2.2-0           SaaSc_0.1.0       
[10] rmarkdown_2.29    

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.22            splines_4.4.3              
  [3] later_1.4.1                 tibble_3.2.1               
  [5] polyclip_1.10-7             fastDummies_1.7.5          
  [7] lifecycle_1.0.4             globals_0.16.3             
  [9] lattice_0.22-6              MASS_7.3-64                
 [11] magrittr_2.0.3              plotly_4.10.4              
 [13] sass_0.4.9                  jquerylib_0.1.4            
 [15] yaml_2.3.10                 httpuv_1.6.15              
 [17] sctransform_0.4.1           askpass_1.2.1              
 [19] spam_2.11-1                 spatstat.sparse_3.1-0      
 [21] reticulate_1.41.0.1         cowplot_1.1.3              
 [23] pbapply_1.7-2               RColorBrewer_1.1-3         
 [25] abind_1.4-8                 zlibbioc_1.52.0            
 [27] Rtsne_0.17                  GenomicRanges_1.58.0       
 [29] purrr_1.0.4                 downlit_0.4.4              
 [31] BiocGenerics_0.52.0         GenomeInfoDbData_1.2.13    
 [33] IRanges_2.40.1              S4Vectors_0.44.0           
 [35] ggrepel_0.9.6               irlba_2.3.5.1              
 [37] listenv_0.9.1               spatstat.utils_3.1-2       
 [39] umap_0.2.10.0               goftest_1.2-3              
 [41] RSpectra_0.16-2             spatstat.random_3.3-2      
 [43] fitdistrplus_1.2-2          parallelly_1.42.0          
 [45] codetools_0.2-20            DelayedArray_0.32.0        
 [47] scuttle_1.16.0              tidyselect_1.2.1           
 [49] UCSC.utils_1.2.0            distill_1.6                
 [51] farver_2.1.2                viridis_0.6.5              
 [53] ScaledMatrix_1.14.0         matrixStats_1.5.0          
 [55] stats4_4.4.3                spatstat.explore_3.3-4     
 [57] jsonlite_1.9.1              BiocNeighbors_2.0.1        
 [59] progressr_0.15.1            ggridges_0.5.6             
 [61] survival_3.8-3              scater_1.34.1              
 [63] tictoc_1.2.1                tools_4.4.3                
 [65] ica_1.0-3                   Rcpp_1.0.14                
 [67] glue_1.8.0                  gridExtra_2.3              
 [69] SparseArray_1.6.2           mgcv_1.9-1                 
 [71] xfun_0.51                   MatrixGenerics_1.18.1      
 [73] GenomeInfoDb_1.42.3         withr_3.0.2                
 [75] fastmap_1.2.0               fansi_1.0.6                
 [77] openssl_2.3.2               RcppML_0.3.7               
 [79] digest_0.6.37               rsvd_1.0.5                 
 [81] R6_2.6.1                    mime_0.12                  
 [83] colorspace_2.1-1            scattermore_1.2            
 [85] tensor_1.5                  spatstat.data_3.1-4        
 [87] tidyr_1.3.1                 generics_0.1.3             
 [89] data.table_1.17.0           httr_1.4.7                 
 [91] htmlwidgets_1.6.4           S4Arrays_1.6.0             
 [93] uwot_0.2.3                  pkgconfig_2.0.3            
 [95] gtable_0.3.6                lmtest_0.9-40              
 [97] SingleCellExperiment_1.28.1 XVector_0.46.0             
 [99] htmltools_0.5.8.1           dotCall64_1.2              
[101] bookdown_0.42               fgsea_1.32.0               
[103] scales_1.3.0                Biobase_2.66.0             
[105] png_0.1-8                   spatstat.univar_3.1-2      
[107] knitr_1.50                  rstudioapi_0.17.1          
[109] reshape2_1.4.4              nlme_3.1-167               
[111] cachem_1.1.0                zoo_1.8-13                 
[113] stringr_1.5.1               KernSmooth_2.23-26         
[115] vipor_0.4.7                 parallel_4.4.3             
[117] miniUI_0.1.1.1              pillar_1.10.1              
[119] grid_4.4.3                  vctrs_0.6.5                
[121] RANN_2.6.2                  promises_1.3.2             
[123] BiocSingular_1.22.0         beachmat_2.22.0            
[125] xtable_1.8-4                cluster_2.1.8              
[127] beeswarm_0.4.0              CelliD_1.14.0              
[129] evaluate_1.0.3              cli_3.6.4                  
[131] compiler_4.4.3              rlang_1.1.5                
[133] crayon_1.5.3                future.apply_1.11.3        
[135] labeling_0.4.3              RcppArmadillo_14.2.3-1     
[137] plyr_1.8.9                  ggbeeswarm_0.7.2           
[139] stringi_1.8.4               viridisLite_0.4.2          
[141] deldir_2.0-4                BiocParallel_1.40.0        
[143] munsell_0.5.1               lazyeval_0.2.2             
[145] spatstat.geom_3.3-5         RcppHNSW_0.6.0             
[147] future_1.34.0               shiny_1.10.0               
[149] SummarizedExperiment_1.36.0 ROCR_1.0-11                
[151] fontawesome_0.5.3           igraph_2.1.4               
[153] memoise_2.0.1               bslib_0.9.0                
[155] fastmatch_1.1-6